"""
URL Validator for SSRF Prevention

Validates URLs to prevent Server-Side Request Forgery (SSRF) attacks
by blocking requests to internal/private networks and enforcing safe schemes.
"""

import ipaddress
import os
import socket
from urllib.parse import urlparse
from typing import Optional
from loguru import logger

from ..settings.env_registry import get_env_setting


# Blocked IP ranges (RFC1918 private networks, localhost, link-local, etc.)
# nosec B104 - These hardcoded IPs are intentional for SSRF prevention (blocking private networks)
BLOCKED_IP_RANGES = [
    ipaddress.ip_network("127.0.0.0/8"),  # Loopback
    ipaddress.ip_network("::1/128"),  # IPv6 loopback
    ipaddress.ip_network("10.0.0.0/8"),  # Private network
    ipaddress.ip_network("172.16.0.0/12"),  # Private network
    ipaddress.ip_network("192.168.0.0/16"),  # Private network
    ipaddress.ip_network("169.254.0.0/16"),  # Link-local
    ipaddress.ip_network("fe80::/10"),  # IPv6 link-local
    ipaddress.ip_network("fc00::/7"),  # IPv6 unique local
    ipaddress.ip_network("0.0.0.0/8"),  # "This" network
    ipaddress.ip_network("100.64.0.0/10"),  # Shared address space
]

# AWS metadata endpoint (commonly targeted in SSRF attacks)
# nosec B104 - Hardcoded IP is intentional for SSRF prevention (blocking AWS metadata endpoint)
AWS_METADATA_IP = "169.254.169.254"

# Allowed URL schemes
ALLOWED_SCHEMES = {"http", "https"}


def is_ip_blocked(
    ip_str: str, allow_localhost: bool = False, allow_private_ips: bool = False
) -> bool:
    """
    Check if an IP address is in a blocked range.

    Args:
        ip_str: IP address as string
        allow_localhost: Whether to allow localhost/loopback addresses
        allow_private_ips: Whether to allow all RFC1918 private IPs (10.x, 172.16-31.x,
            192.168.x) plus localhost. Use for trusted self-hosted services like SearXNG.
            Note: AWS metadata endpoint (169.254.169.254) is ALWAYS blocked.

    Returns:
        True if IP is blocked, False otherwise
    """
    # Loopback ranges that can be allowed for trusted internal services
    # nosec B104 - These hardcoded IPs are intentional for SSRF allowlist
    LOOPBACK_RANGES = [
        ipaddress.ip_network("127.0.0.0/8"),  # IPv4 loopback
        ipaddress.ip_network("::1/128"),  # IPv6 loopback
    ]

    # RFC1918 private network ranges - allowed with allow_private_ips=True
    # nosec B104 - These hardcoded IPs are intentional for SSRF allowlist
    PRIVATE_RANGES = [
        ipaddress.ip_network("10.0.0.0/8"),  # Class A private
        ipaddress.ip_network("172.16.0.0/12"),  # Class B private
        ipaddress.ip_network("192.168.0.0/16"),  # Class C private
    ]

    try:
        ip = ipaddress.ip_address(ip_str)

        # ALWAYS block AWS metadata endpoint - critical SSRF target for credential theft
        if str(ip) == AWS_METADATA_IP:
            return True

        # Check if IP is in any blocked range
        for blocked_range in BLOCKED_IP_RANGES:
            if ip in blocked_range:
                # If allow_private_ips is True, skip blocking for private + loopback
                if allow_private_ips:
                    is_loopback = any(ip in lr for lr in LOOPBACK_RANGES)
                    is_private = any(ip in pr for pr in PRIVATE_RANGES)
                    if is_loopback or is_private:
                        continue
                # If allow_localhost is True, skip blocking for loopback only
                elif allow_localhost:
                    is_loopback = any(ip in lr for lr in LOOPBACK_RANGES)
                    if is_loopback:
                        continue
                return True

        return False

    except ValueError:
        # Invalid IP address
        return False


def validate_url(
    url: str,
    allow_redirects: bool = True,
    allow_localhost: bool = False,
    allow_private_ips: bool = False,
) -> bool:
    """
    Validate URL to prevent SSRF attacks.

    Checks:
    1. URL scheme is allowed (http/https only)
    2. Hostname is not an internal/private IP address
    3. Hostname does not resolve to an internal/private IP

    Args:
        url: URL to validate
        allow_redirects: Whether to allow redirects (future use)
        allow_localhost: Whether to allow localhost/loopback addresses.
            Set to True for trusted internal services like self-hosted
            search engines (e.g., searxng). Default False.
        allow_private_ips: Whether to allow all RFC1918 private IPs (10.x, 172.16-31.x,
            192.168.x) plus localhost. Use for trusted self-hosted services like SearXNG
            that may be running on a different machine on the local network.
            Note: AWS metadata endpoint (169.254.169.254) is ALWAYS blocked.

    Returns:
        True if URL is safe, False otherwise

    Note:
        SSRF validation can be disabled for testing by setting environment variables:
        - TESTING=true
        - PYTEST_CURRENT_TEST (automatically set by pytest)
        - LDR_SECURITY_SSRF_DISABLE_VALIDATION=true
    """
    # Bypass SSRF validation in test mode
    # Check environment variables at runtime (not import time) to ensure
    # pytest's PYTEST_CURRENT_TEST is captured when tests actually run
    disable_ssrf = get_env_setting(
        "security.ssrf.disable_validation", default=False
    )
    testing_mode = os.environ.get("TESTING", "").lower() in ("true", "1", "yes")
    pytest_current_test = os.environ.get("PYTEST_CURRENT_TEST")
    if testing_mode or pytest_current_test or disable_ssrf:
        logger.debug(f"SSRF validation bypassed in test mode for URL: {url}")
        return True

    try:
        parsed = urlparse(url)

        # Check scheme
        if parsed.scheme.lower() not in ALLOWED_SCHEMES:
            logger.warning(
                f"Blocked URL with invalid scheme: {parsed.scheme} - {url}"
            )
            return False

        hostname = parsed.hostname
        if not hostname:
            logger.warning(f"Blocked URL with no hostname: {url}")
            return False

        # Check if hostname is an IP address
        try:
            ip = ipaddress.ip_address(hostname)
            if is_ip_blocked(
                str(ip),
                allow_localhost=allow_localhost,
                allow_private_ips=allow_private_ips,
            ):
                logger.warning(
                    f"Blocked URL with internal/private IP: {hostname} - {url}"
                )
                return False
        except ValueError:
            # Not an IP address, it's a hostname - need to resolve it
            pass

        # Resolve hostname to IP and check
        try:
            # Get all IP addresses for hostname
            # nosec B104 - DNS resolution is intentional for SSRF prevention (checking if hostname resolves to private IP)
            addr_info = socket.getaddrinfo(
                hostname, None, socket.AF_UNSPEC, socket.SOCK_STREAM
            )

            for info in addr_info:
                ip_str = info[4][0]  # Extract IP address from addr_info tuple

                if is_ip_blocked(
                    ip_str,
                    allow_localhost=allow_localhost,
                    allow_private_ips=allow_private_ips,
                ):
                    logger.warning(
                        f"Blocked URL - hostname {hostname} resolves to "
                        f"internal/private IP: {ip_str} - {url}"
                    )
                    return False

        except socket.gaierror as e:
            logger.warning(f"Failed to resolve hostname {hostname}: {e}")
            return False
        except Exception:
            logger.exception("Error during hostname resolution")
            return False

        # URL passes all checks
        return True

    except Exception:
        logger.exception(f"Error validating URL {url}")
        return False


def get_safe_url(
    url: Optional[str], default: Optional[str] = None
) -> Optional[str]:
    """
    Get URL if it's safe, otherwise return default.

    Args:
        url: URL to validate
        default: Default value if URL is unsafe

    Returns:
        URL if safe, default otherwise
    """
    if not url:
        return default

    if validate_url(url):
        return url

    logger.warning(f"Unsafe URL rejected: {url}")
    return default
